from csdn_word_split import csdn_cut_words
from csdn_word_to_vecotr import csdn_word2vector, reload_csdn_model
from spider_csdn_datas import spider_csdn

if __name__ == '__main__':
    # spider csdn
    save_art_path = r'csdn_spider_datas'
    spider_csdn(save_art_path, 3)   # 爬取数据

    # word cut
    csdn_orgin_path = r'csdn_spider_datas'
    csdn_save_cut_path = r'csdn_cut_datas'
    stop_words_path = r'csdn_stop_words\stopwords.txt'
    csdn_cut_words(csdn_orgin_path, csdn_save_cut_path, stop_words_path=stop_words_path)    # 将数据切分

    # word to vector
    cut_datas_path = r'csdn_cut_datas'
    save_model_path = r'csdn_w2v_model'
    model = csdn_word2vector(cut_datas_path, save_model_path)   # 创建保存模型

    # 测试模型
    print(model['文件'])